Here we’ll plot the all the CCF histograms along with annotations of subclonal drivers. We present a subset of cases in Figure 1 and provide all the plots in supplementary figure 1. We’ll first read in the MAF ccf file we processed earlier.
library(data.table)
library(tidyverse)
maf_ccf <- fread("processed_data/maf_subclone_ccf.csv.gz")
maf_ccf <- maf_ccf %>%
mutate(histology_abbreviation = str_remove_all(histology_abbreviation, ".clearcell"))
library(ggrepel)
library(cowplot)
library(gtools)
CCF histograms
library(ggrepel)
library(cowplot)
library(gtools)
sample_to_idx <- distinct(maf_ccf, Tumor_Sample_Barcode, histology_abbreviation) %>%
arrange(histology_abbreviation) %>%
mutate(idx = paste0(round(row_number() / 100) * 100))
sample_ids_chunk <- split(sample_to_idx$Tumor_Sample_Barcode, sample_to_idx$idx)
for (ct in mixedsort(names(sample_ids_chunk))){
cat(paste0('## ', as.numeric(ct) + 1, '-', as.numeric(ct) + 100, ' \n'))
samples <- sample_ids_chunk[[ct]]
dfdrivers <- maf_ccf %>%
filter(issubclonaldriver == TRUE) %>%
filter(Tumor_Sample_Barcode %in% samples)
if (dim(dfdrivers)[1] > 0){
g <- maf_ccf %>%
filter(Tumor_Sample_Barcode %in% samples) %>%
ggplot(aes(x = ccf, fill = cluster)) +
geom_histogram(bins = 100, position = "stack", alpha = 0.6) +
facet_wrap(histology_abbreviation~icgc_sample_id, scales = "free_y") +
geom_text_repel(data = dfdrivers,
aes(label = gene, y = Inf, x = ccf), col = "black",
alpha = 0.85, size = 2.0) +
geom_vline(data=dfdrivers,
aes(xintercept = ccf, col = cluster), lty = 2, size = 0.25, show.legend = FALSE) +
scale_color_manual(values = RColorBrewer::brewer.pal(6, "Dark2"), breaks = paste0("cluster", 1:6)) +
scale_fill_manual(values = RColorBrewer::brewer.pal(6, "Dark2"), breaks = paste0("cluster", 1:6)) +
theme_cowplot(font_size = 11) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 3)) +
scale_x_continuous(breaks = c(0.0, 1.0, 2.0), limits = c(0,2)) +
panel_border() +
xlab("CCF") +
ylab("Counts")
print(g)
} else {
g <- maf_ccf %>%
filter(Tumor_Sample_Barcode %in% samples) %>%
ggplot(aes(x = ccf, fill = cluster)) +
geom_histogram(bins = 100, position = "stack", alpha = 0.6) +
facet_wrap(histology_abbreviation~icgc_sample_id, scales = "free_y") +
scale_color_manual(values = RColorBrewer::brewer.pal(6, "Dark2"), breaks = paste0("cluster", 1:6)) +
scale_fill_manual(values = RColorBrewer::brewer.pal(6, "Dark2"), breaks = paste0("cluster", 1:6)) +
theme_cowplot(font_size = 11) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 3)) +
scale_x_continuous(breaks = c(0.0, 1.0, 2.0), limits = c(0,2)) +
panel_border() +
xlab("CCF") +
ylab("Counts")
print(g)
}
cat(' \n \n')
}
1-100
101-200
201-300
301-400
401-500
501-600
601-700
701-800
801-900
901-1000
1001-1100
1101-1200
1201-1300
1301-1400
## png
## 2
Figure 1
The ID’s for the 41 cases we include in Figure 1 are available in PCAWG_subclonal_drivers.csv. We’ll now pull these out and plot the CCF histograms. This is figure 1 in our paper.
md <- fread("metadata/PCAWG_subclonal_drivers.csv") %>%
filter(include == "Y")
samples_to_include <- md$Tumor_Sample_Barcode
dfdrivers <- maf_ccf %>%
mutate(cluster = str_remove(cluster, "cluster")) %>%
filter(issubclonaldriver == TRUE) %>%
filter(Tumor_Sample_Barcode %in% samples_to_include) %>%
arrange(Tumor_Sample_Barcode, ccf) %>%
group_by(Tumor_Sample_Barcode) %>%
mutate(x = row_number()) %>%
mutate(x = ifelse(x == 1, max(ccf - 0.4, 0.0), ccf + 0.3))
g <- maf_ccf %>%
mutate(cluster = str_remove(cluster, "cluster")) %>%
filter(Tumor_Sample_Barcode %in% samples_to_include) %>%
ggplot(aes(x = ccf, fill = cluster)) +
geom_histogram(bins = 100, position = "stack", alpha = 0.6, size = 0.0) +
facet_wrap(~icgc_sample_id, scales = "free_y") +
geom_text_repel(data = dfdrivers,
force = 1,
aes(label = gene, y = Inf, x = x), col = "black",
alpha = 0.85, size = 1.5) +
geom_vline(data=dfdrivers,
aes(xintercept = ccf, col = cluster), lty = 2, size = 0.25, show.legend = FALSE) +
scale_color_manual(values = RColorBrewer::brewer.pal(6, "Dark2"), breaks = paste0(1:6)) +
scale_fill_manual(values = RColorBrewer::brewer.pal(6, "Dark2"), breaks = paste0(1:6)) +
theme_cowplot(font_size = 8, line_size = 0.25) +
theme(legend.position = c(0.9, 0.075)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 3)) +
scale_x_continuous(breaks = c(0.0, 1.0, 2.0), limits = c(0,2)) +
labs(fill = "Cluster") +
guides(fill=guide_legend(nrow=2)) +
#ggtitle(ct) +
panel_border() +
xlab("CCF") +
ylab("Counts")
g
Figure 1: 41 cases with subclonal drivers
save_plot(filename = "Figure1.png", plot = g, base_height = 5, base_width = 7)
save_plot(filename = "Figure1.pdf", plot = g, base_height = 5, base_width = 7)
For these 41 cases we’ll also pull out the clustering probabilities for each cluster, which is used in one of our supplementary figures.
library(glue)
dfclust <- data.frame()
for (sa in samples_to_include){
clust_assignments <- fread(glue("subclones_data/{sa}_cluster_assignments.txt.gz"))[mut_type != "SV"] %>%
mutate(sample = sa) %>%
select(sample, everything(), -chromosome2, -position2)
dfclust <- bind_rows(dfclust, clust_assignments)
}
fwrite(dfclust, "processed_data/cluster_probs_41cases.csv.gz")